Begin

Read in libraries and 2016-2019 data; 2015 UPC uses quadrats and is inconsistent with later methods.

#to load data make sure directory is set to knit from project directory
knitr::opts_chunk$set(message=FALSE, warning=FALSE)

# Libraries
library(tidyverse)
library(ggplot2)
library(viridis)
library(reshape2)
library(janitor)
library(magrittr)
library(vegan)

#base.dir <- "/Users/ole.shelton/GitHub/OCNMS"
base.dir <- "/Users/jameal.samhouri/Dropbox/Projects/In progress/OCNMS/OCNMS"
data.dir <- paste0(base.dir,"/Data/CSV_2015_on")
data.summary.output.dir <- paste0(base.dir,"/Data/Summarized data")
setwd(data.dir)

# 2015 data. tricky because in 2015 we took % cover data in quadrats, and that included things that we/PISCO now call substrate but also that we/PISCO now call % cover. exclude for now
# dat_2015 <- clean_names(read.csv("2015_OCNMSDataComplete_standardized_122116.csv"), case = "all_caps") %>%
#   remove_empty(c("rows", "cols")) 
# glimpse(dat_2015)
# head(dat_2015)
# upc_2015 <- dat_2015 %>% filter(PISCO_DATATYPE=="UPC")
# head(upc_2015)


dat.2016.on.upc <- clean_names(read.csv("NWFSC_UPC_ALLYEARS_data_2019.csv"), case = "all_caps") %>%
  remove_empty(c("rows", "cols")) 
# fix data entry issues
dat.2016.on.upc %<>% mutate(
  CLASSCODE = toupper(CLASSCODE)
)
unique(dat.2016.on.upc$CLASSCODE)
##  [1] "BEDRK"     "BOULD"     "SAND"      "0-10CM"    "10CM-1M"   "BARSAN"   
##  [7] "LEAFYRED"  "CRUCOR"    "ZOSTERA"   "BROWNUN"   "PTERO"     "COB"      
## [13] "MACPYR_HF" "HYDROID"   "NEREO_HF"  "1M-2M"     "LACY"      "SHELL"    
## [19] "BRANCH"    "BARROC"    "ARTCOR"    "CUPCOR"    "BARNAC"    "BRYO"     
## [25] ">2M"       "SPONGE"    "SOLTUN"    "BIVALVE"   "LAMHOLD"   "PHYSPP"   
## [31] "SCUM"      "COLTUN"    "BUSHY"     "TUBEWORM"  "ENCRED"    "SCALLOP"  
## [37] "URCHIN"    "HYDROCOR"  "GREEN"     "CUCSPP"    "ANEM"
unique(dat.2016.on.upc$SITE)
## [1] Neah Bay           Destruction Island Cape Johnson       Tatoosh Island    
## [5] Cape Alava        
## 5 Levels: Cape Alava Cape Johnson Destruction Island ... Tatoosh Island
glimpse(dat.2016.on.upc)
## Rows: 6,570
## Columns: 17
## $ ENTRY_ORDER <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17…
## $ YEAR        <int> 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 201…
## $ MONTH       <int> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, …
## $ DAY         <int> 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 1…
## $ SITE        <fct> Neah Bay, Neah Bay, Neah Bay, Neah Bay, Neah Bay, Neah Ba…
## $ OBSERVER    <fct> Kinsey Frick, Kinsey Frick, Kinsey Frick, Kinsey Frick, K…
## $ BUDDY       <fct> Steve Lonhart, Steve Lonhart, Steve Lonhart, Steve Lonhar…
## $ SIDE        <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ ZONE        <int> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, …
## $ TRANSECT    <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ HEADING     <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ DEPTH_FT    <int> 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 1…
## $ SEGMENT     <fct> 0-10m, 0-10m, 0-10m, 0-10m, 0-10m, 0-10m, 0-10m, 0-10m, 0…
## $ CATEGORY    <fct> SUBSTRATE, SUBSTRATE, SUBSTRATE, RELIEF, RELIEF, COVER, C…
## $ CLASSCODE   <chr> "BEDRK", "BOULD", "SAND", "0-10CM", "10CM-1M", "BARSAN", …
## $ COUNT       <int> 2, 2, 6, 9, 1, 5, 3, 1, 1, 6, 4, 4, 6, 3, 3, 2, 1, 1, 2, …
## $ NOTES       <fct> , , , , , , , , , , , , , , , , , , , , , , , , ,
substrate.codes <- clean_names(read.csv("substrate_codes.csv"), case = "all_caps") %>%
  remove_empty(c("rows", "cols")) # js checked on 072720 and codes are same in 2019 data
  
substrate <- dat.2016.on.upc %>% filter(CATEGORY=="SUBSTRATE")
#relief <- dat.2016.on.upc %>% filter(CATEGORY=="RELIEF")
#cover <- dat.2016.on.upc %>% filter(CATEGORY=="COVER")

Analyze substrate data

Summarise raw data into data frames that: 1) sum segments for each transect. 2) calculate summary stats for site-year-zone. 3) calculate summary stats for site-year. 4) calculate summary stats for site-zone. 5) calculate summary stats for site.

# Pad with zero observations. i.e., complete the data so that each segment has all 4 substrate categories
  cat <- data.frame(merge(unique(substrate$SITE),unique(substrate$CLASSCODE))); colnames(cat) <- c("SITE","CLASSCODE")
  dat.sub <- substrate %>% 
    group_by(YEAR,SITE,SEGMENT,TRANSECT,SIDE,ZONE,OBSERVER) %>% 
    summarise(a=length(SEGMENT)) %>% 
    full_join(.,cat,by="SITE") %>% 
    dplyr::select(-a) %>% 
    left_join(.,substrate)
  dat.sub$COUNT[is.na(dat.sub$COUNT)==TRUE] <- 0
  
  # order sites from south to north
  dat.sub$SITE <- factor(dat.sub$SITE,
               levels=c("Destruction Island","Cape Johnson","Cape Alava","Tatoosh Island","Neah Bay"))
  
  # make year a factor
  dat.sub$YEAR <- as.factor(dat.sub$YEAR)
  
  # not sure if we have to account for SIDE 
  # (eg, multiple observers on a segment)? looks like no.
  tmp<-substrate %>% 
    group_by(YEAR,SITE,SEGMENT,TRANSECT,SIDE,ZONE,OBSERVER) %>% 
    summarise(a=length(SEGMENT), b=length(OBSERVER))
  which(tmp$a != tmp$b) # integer(0)
## integer(0)
  # with(dat.sub, table(SITE, ZONE, TRANSECT, SIDE))
  # unique(dat.sub$SIDE)
  # length(which(dat.sub$SIDE != 1))/dim(dat.sub)[1]
  
  # but it does look like there are some issues with duplicate entries. so fix those
  dat.sub <- dat.sub %>% 
    group_by(YEAR,SITE,ZONE,SIDE,TRANSECT) %>%
    distinct(CLASSCODE, SEGMENT, COUNT, .keep_all = TRUE) %>% # remove duplicate entries based on CLASSCODE, SEGMENT, COUNT
    mutate(
      TOTAL_COUNT = sum(COUNT),
      PROPORTION = COUNT/TOTAL_COUNT
    )
  unique(dat.sub$TOTAL_COUNT) # 30 29
## [1] 30 29
  View(dat.sub[which(dat.sub$TOTAL_COUNT !=30),])
  glimpse(dat.sub)
## Rows: 3,324
## Columns: 19
## Groups: YEAR, SITE, ZONE, SIDE, TRANSECT [277]
## $ YEAR        <fct> 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 201…
## $ SITE        <fct> Cape Alava, Cape Alava, Cape Alava, Cape Alava, Cape Alav…
## $ SEGMENT     <fct> 0-10m, 0-10m, 0-10m, 0-10m, 0-10m, 0-10m, 0-10m, 0-10m, 0…
## $ TRANSECT    <int> 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, …
## $ SIDE        <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ ZONE        <int> 5, 5, 5, 5, 10, 10, 10, 10, 5, 5, 5, 5, 10, 10, 10, 10, 5…
## $ OBSERVER    <fct> Kelly Andrews, Kelly Andrews, Kelly Andrews, Kelly Andrew…
## $ CLASSCODE   <chr> "BEDRK", "BOULD", "SAND", "COB", "BEDRK", "BOULD", "SAND"…
## $ ENTRY_ORDER <int> NA, 1037, NA, 1038, 883, 884, 886, 885, 1058, 1059, NA, 1…
## $ MONTH       <int> NA, 8, NA, 8, 8, 8, 8, 8, 8, 8, NA, 8, 8, 8, 8, 8, NA, 8,…
## $ DAY         <int> NA, 10, NA, 10, 10, 10, 10, 10, 10, 10, NA, 10, 10, 10, 1…
## $ BUDDY       <fct> NA, Steve Lonhart, NA, Steve Lonhart, Greg Williams, Greg…
## $ HEADING     <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ DEPTH_FT    <int> NA, 22, NA, 22, 31, 31, 31, 31, 16, 16, NA, 16, 26, 26, 2…
## $ CATEGORY    <fct> NA, SUBSTRATE, NA, SUBSTRATE, SUBSTRATE, SUBSTRATE, SUBST…
## $ COUNT       <dbl> 0, 7, 0, 3, 2, 5, 1, 2, 1, 7, 0, 2, 7, 1, 1, 1, 0, 8, 0, …
## $ NOTES       <fct> NA, , NA, , , , , , , , NA, , , , , , NA, , NA, , , , , ,…
## $ TOTAL_COUNT <dbl> 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 3…
## $ PROPORTION  <dbl> 0.00000000, 0.23333333, 0.00000000, 0.10000000, 0.0666666…
  # create summary df. 1) sum segments for each transect. 2) calculate summary stats for site-year-zone. 3) calculate summary stats for site-year. 4) calculate summary stats for site-zone. 5) calculate summary stats for site
  #proportion data so use binomial distribution for summary stats. https://sites.warnercnr.colostate.edu/gwhite/wp-content/uploads/sites/73/2017/04/BinomialDistribution.pdf
  
  # 1a) sum segments for each transect to get total proportion of each substrate CLASSCODE
  sub.summary.year.transect <- dat.sub %>% 
    group_by(YEAR,SITE,ZONE,SIDE,TRANSECT,CLASSCODE) %>%
    dplyr::summarise(
      PROPORTION=sum(PROPORTION),
      PROPORTION_2=sum(PROPORTION)^2,
      .groups = "keep"
    )
  sub.summary.year.transect$YEAR <- as.factor(sub.summary.year.transect$YEAR)
  # order sites from south to north
  sub.summary.year.transect$SITE <- factor(sub.summary.year.transect$SITE,
                                       levels=c("Destruction Island","Cape Johnson","Cape Alava","Tatoosh Island","Neah Bay"))
  glimpse(sub.summary.year.transect)
## Rows: 1,108
## Columns: 8
## Groups: YEAR, SITE, ZONE, SIDE, TRANSECT, CLASSCODE [1,108]
## $ YEAR         <fct> 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 20…
## $ SITE         <fct> Destruction Island, Destruction Island, Destruction Isla…
## $ ZONE         <int> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10, 10, …
## $ SIDE         <fct> 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1,…
## $ TRANSECT     <int> 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1,…
## $ CLASSCODE    <chr> "BEDRK", "BOULD", "COB", "SAND", "BEDRK", "BOULD", "COB"…
## $ PROPORTION   <dbl> 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0,…
## $ PROPORTION_2 <dbl> 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0,…
  # 1b) calculate substrate diversity for each transect
  sub.summary.year.transect.div <- sub.summary.year.transect %>%
    #dat.sub %>%
    #filter(!is.na(COUNT)) %>%
    #pivot_wider(names_from = CLASSCODE, values_from = COUNT, values_fill = list(COUNT=0)) 
  #sub.summary.year.transect.div2 <- sub.summary.year.transect.div1 %>%
    group_by(YEAR,SITE,ZONE,SIDE,TRANSECT) %>%
    dplyr::summarise(
      DIVERSITY= sum(PROPORTION_2),
      SUBSTRATE_DIVERSITY= 1 - DIVERSITY, # simpsons diversity
      #SUBSTRATE_DIVERSITY=diversity(sub.summary.year.transect.div1[,c('BEDRK', 'BOULD', 'SAND', 'COB')], index = "simpson"),
      .groups = "keep"
    ) %>%
    select(-DIVERSITY)
  glimpse(sub.summary.year.transect.div)
## Rows: 277
## Columns: 6
## Groups: YEAR, SITE, ZONE, SIDE, TRANSECT [277]
## $ YEAR                <fct> 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2…
## $ SITE                <fct> Destruction Island, Destruction Island, Destructi…
## $ ZONE                <int> 5, 5, 5, 5, 10, 10, 10, 10, 10, 10, 5, 5, 5, 5, 5…
## $ SIDE                <fct> 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1…
## $ TRANSECT            <int> 1, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 4, 5, 6, 1…
## $ SUBSTRATE_DIVERSITY <dbl> 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0…
  # ggplot(data=sub.summary.year.transect.div) +
  #   geom_density(aes(SUBSTRATE_DIVERSITY)) +
  #   theme_bw()
  
  # 2a) calculate summary stats for site-year-zone
  sub.summary.year.zone <- sub.summary.year.transect %>% 
    group_by(YEAR,SITE,ZONE,CLASSCODE) %>%
    dplyr::summarise(
      N=length(PROPORTION),
      MEAN=mean(PROPORTION),
      SD=sqrt(MEAN * (1 - MEAN) * N),
      SE=SD/sqrt(N),
      .groups = "keep"
      )
  #sub.summary.year.zone$YEAR <- as.factor(sub.summary.year.zone$YEAR)
  # order sites from south to north
  sub.summary.year.zone$SITE <- factor(sub.summary.year.zone$SITE,
                         levels=c("Destruction Island","Cape Johnson","Cape Alava","Tatoosh Island","Neah Bay"))
  glimpse(sub.summary.year.zone)
## Rows: 156
## Columns: 8
## Groups: YEAR, SITE, ZONE, CLASSCODE [156]
## $ YEAR      <fct> 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016,…
## $ SITE      <fct> Destruction Island, Destruction Island, Destruction Island,…
## $ ZONE      <int> 5, 5, 5, 5, 10, 10, 10, 10, 5, 5, 5, 5, 10, 10, 10, 10, 5, …
## $ CLASSCODE <chr> "BEDRK", "BOULD", "COB", "SAND", "BEDRK", "BOULD", "COB", "…
## $ N         <int> 4, 4, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,…
## $ MEAN      <dbl> 1.00000000, 0.00000000, 0.00000000, 0.00000000, 0.91666667,…
## $ SD        <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.6770032, 0.47…
## $ SE        <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.2763854, 0.19…
  # 2b) calculate substrate diversity for each site-year-zone
  sub.summary.year.zone.div <- sub.summary.year.transect.div %>% 
    group_by(YEAR,SITE,ZONE) %>%
    dplyr::summarise(
      MEAN=mean(SUBSTRATE_DIVERSITY),
      SD= sd(SUBSTRATE_DIVERSITY),
      N=length(SUBSTRATE_DIVERSITY),
      SE=SD/sqrt(N),
      .groups = "keep"
      )
  glimpse(sub.summary.year.zone.div)
## Rows: 39
## Columns: 7
## Groups: YEAR, SITE, ZONE [39]
## $ YEAR <fct> 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2017…
## $ SITE <fct> Destruction Island, Destruction Island, Cape Johnson, Cape Johns…
## $ ZONE <int> 5, 10, 5, 10, 5, 10, 5, 10, 5, 10, 5, 5, 10, 5, 10, 5, 10, 5, 10…
## $ MEAN <dbl> 0.00000000, 0.13222222, 0.43259259, 0.56185185, 0.46333333, 0.52…
## $ SD   <dbl> 0.00000000, 0.19848687, 0.09957606, 0.09986081, 0.13923069, 0.11…
## $ N    <int> 4, 6, 6, 6, 6, 6, 6, 2, 4, 6, 8, 8, 8, 8, 12, 6, 7, 8, 8, 8, 7, …
## $ SE   <dbl> 0.00000000, 0.08103192, 0.04065175, 0.04076800, 0.05684069, 0.04…
  # 3a) calculate summary stats for site-year
  sub.summary.year <- sub.summary.year.transect %>% 
    group_by(YEAR,SITE,CLASSCODE) %>%
    dplyr::summarise(
      N=length(PROPORTION),
      MEAN=mean(PROPORTION),
      SD=sqrt(MEAN * (1 - MEAN) * N),
      SE=SD/sqrt(N),
      .groups = "keep"
    )
  #sub.summary.year$YEAR <- as.factor(sub.summary.year$YEAR)
  sub.summary.year$SITE <- factor(sub.summary.year$SITE,
                                       levels=c("Destruction Island","Cape Johnson","Cape Alava","Tatoosh Island","Neah Bay"))
  glimpse(sub.summary.year)
## Rows: 80
## Columns: 7
## Groups: YEAR, SITE, CLASSCODE [80]
## $ YEAR      <fct> 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016,…
## $ SITE      <fct> Destruction Island, Destruction Island, Destruction Island,…
## $ CLASSCODE <chr> "BEDRK", "BOULD", "COB", "SAND", "BEDRK", "BOULD", "COB", "…
## $ N         <int> 10, 10, 10, 10, 12, 12, 12, 12, 12, 12, 12, 12, 8, 8, 8, 8,…
## $ MEAN      <dbl> 0.950000000, 0.023333333, 0.013333333, 0.013333333, 0.22777…
## $ SD        <dbl> 0.6892024, 0.4773771, 0.3627059, 0.3627059, 1.4528389, 1.72…
## $ SE        <dbl> 0.2179449, 0.1509599, 0.1146977, 0.1146977, 0.4193985, 0.49…
    # 3b) calculate substrate diversity for each site-year-zone
  sub.summary.year.div <- sub.summary.year.transect.div %>% 
    group_by(YEAR,SITE) %>%
    dplyr::summarise(
      MEAN=mean(SUBSTRATE_DIVERSITY),
      SD= sd(SUBSTRATE_DIVERSITY),
      N=length(SUBSTRATE_DIVERSITY),
      SE=SD/sqrt(N),
      .groups = "keep"
      )
  glimpse(sub.summary.year.div)
## Rows: 20
## Columns: 6
## Groups: YEAR, SITE [20]
## $ YEAR <fct> 2016, 2016, 2016, 2016, 2016, 2017, 2017, 2017, 2017, 2017, 2018…
## $ SITE <fct> Destruction Island, Cape Johnson, Cape Alava, Tatoosh Island, Ne…
## $ MEAN <dbl> 0.079333333, 0.497222222, 0.495185185, 0.008055556, 0.319777778,…
## $ SD   <dbl> 0.16293956, 0.11660412, 0.12499098, 0.02278455, 0.19935823, 0.06…
## $ N    <int> 10, 12, 12, 8, 10, 8, 16, 20, 13, 16, 15, 15, 16, 15, 15, 16, 15…
## $ SE   <dbl> 0.051526013, 0.033660710, 0.036081789, 0.008055556, 0.063042608,…
  # 4a) calculate summary stats for site-zone
  sub.summary.zone <- sub.summary.year.transect %>% 
    group_by(SITE,ZONE,CLASSCODE) %>%
    dplyr::summarise(
      N=length(PROPORTION),
      MEAN=mean(PROPORTION),
      SD=sqrt(MEAN * (1 - MEAN) * N),
      SE=SD/sqrt(N),
      .groups = "keep"
    )
  sub.summary.zone$SITE <- factor(sub.summary.zone$SITE,
                                  levels=c("Destruction Island","Cape Johnson","Cape Alava","Tatoosh Island","Neah Bay"))
  glimpse(sub.summary.zone)
## Rows: 40
## Columns: 7
## Groups: SITE, ZONE, CLASSCODE [40]
## $ SITE      <fct> Destruction Island, Destruction Island, Destruction Island,…
## $ ZONE      <int> 5, 5, 5, 5, 10, 10, 10, 10, 5, 5, 5, 5, 10, 10, 10, 10, 5, …
## $ CLASSCODE <chr> "BEDRK", "BOULD", "COB", "SAND", "BEDRK", "BOULD", "COB", "…
## $ N         <int> 28, 28, 28, 28, 21, 21, 21, 21, 30, 30, 30, 30, 28, 28, 28,…
## $ MEAN      <dbl> 0.978571429, 0.016666667, 0.003571429, 0.001190476, 0.92380…
## $ SD        <dbl> 0.7662525, 0.6774134, 0.3156626, 0.1824655, 1.2157694, 1.09…
## $ SE        <dbl> 0.14480811, 0.12801910, 0.05965462, 0.03448273, 0.26530263,…
  # 4b) calculate substrate diversity for each site-year-zone
  sub.summary.zone.div <- sub.summary.year.transect.div %>% 
    group_by(SITE,ZONE) %>%
    dplyr::summarise(
      MEAN=mean(SUBSTRATE_DIVERSITY),
      SD= sd(SUBSTRATE_DIVERSITY),
      N=length(SUBSTRATE_DIVERSITY),
      SE=SD/sqrt(N),
      .groups = "keep"
      )
  glimpse(sub.summary.zone.div)
## Rows: 10
## Columns: 6
## Groups: SITE, ZONE [10]
## $ SITE <fct> Destruction Island, Destruction Island, Cape Johnson, Cape Johns…
## $ ZONE <int> 5, 10, 5, 10, 5, 10, 5, 10, 5, 10
## $ MEAN <dbl> 0.03841270, 0.09015873, 0.37051852, 0.48865079, 0.38822222, 0.37…
## $ SD   <dbl> 0.07484746, 0.16802053, 0.18821284, 0.16207428, 0.16936226, 0.17…
## $ N    <int> 28, 21, 30, 28, 30, 34, 28, 22, 26, 30
## $ SE   <dbl> 0.01414484, 0.03666509, 0.03436281, 0.03062916, 0.03092118, 0.02…
  # 5a) calculate summary stats for site
  sub.summary.site <- sub.summary.year.transect %>% 
    group_by(SITE,CLASSCODE) %>%
    dplyr::summarise(
      N=length(PROPORTION),
      MEAN=mean(PROPORTION),
      SD=sqrt(MEAN * (1 - MEAN) * N),
      SE=SD/sqrt(N),
      .groups = "keep"
    )
  sub.summary.site$SITE <- factor(sub.summary.site$SITE,
                                  levels=c("Destruction Island","Cape Johnson","Cape Alava","Tatoosh Island","Neah Bay"))
  glimpse(sub.summary.site)
## Rows: 20
## Columns: 6
## Groups: SITE, CLASSCODE [20]
## $ SITE      <fct> Destruction Island, Destruction Island, Destruction Island,…
## $ CLASSCODE <chr> "BEDRK", "BOULD", "COB", "SAND", "BEDRK", "BOULD", "COB", "…
## $ N         <int> 49, 49, 49, 49, 58, 58, 58, 58, 64, 64, 64, 64, 50, 50, 50,…
## $ MEAN      <dbl> 0.955102041, 0.035374150, 0.006122449, 0.003401361, 0.09712…
## $ SD        <dbl> 1.4495601, 1.2930654, 0.5460433, 0.4075534, 2.2552578, 3.70…
## $ SE        <dbl> 0.20708001, 0.18472363, 0.07800618, 0.05822191, 0.29612986,…
  # 5b) calculate substrate diversity for each site-year-zone
  sub.summary.site.div <- sub.summary.year.transect.div %>% 
    group_by(SITE) %>%
    dplyr::summarise(
      MEAN=mean(SUBSTRATE_DIVERSITY),
      SD= sd(SUBSTRATE_DIVERSITY),
      N=length(SUBSTRATE_DIVERSITY),
      SE=SD/sqrt(N),
      .groups = "keep"
      )
  glimpse(sub.summary.site.div)
## Rows: 5
## Columns: 5
## Groups: SITE [5]
## $ SITE <fct> Destruction Island, Cape Johnson, Cape Alava, Tatoosh Island, Ne…
## $ MEAN <dbl> 0.06058957, 0.42754789, 0.38218750, 0.12031111, 0.35751241
## $ SD   <dbl> 0.1248339, 0.1844216, 0.1707970, 0.1808465, 0.2140417
## $ N    <int> 49, 58, 64, 50, 56
## $ SE   <dbl> 0.01783341, 0.02421575, 0.02134963, 0.02557556, 0.02860253
  #### END BASIC SUBSTRATE SUMMARIES.

Write out the data frames we just made.

### WRITE OUT SUBSTRATE DATA FRAMES
  
  
  write_csv(sub.summary.year.transect, paste0(data.summary.output.dir, "/SUBSTRATE summary by YEAR-SITE-DEPTH-SIDE-TRANSECT 2016-2019.csv"))
  write_rds(sub.summary.year.transect, paste0(data.summary.output.dir, "/SUBSTRATE summary by YEAR-SITE-DEPTH-SIDE-TRANSECT 2016-2019.rds"))
  write_csv(sub.summary.year.transect.div, paste0(data.summary.output.dir, "/SUBSTRATE diversity summary by YEAR-SITE-DEPTH-SIDE-TRANSECT 2016-2019.csv"))
  write_rds(sub.summary.year.transect.div, paste0(data.summary.output.dir, "/SUBSTRATE summary by YEAR-SITE-DEPTH-SIDE-TRANSECT 2016-2019.rds"))
  
  write_csv(sub.summary.year.zone, paste0(data.summary.output.dir, "/SUBSTRATE summary by YEAR-SITE-DEPTH 2016-2019.csv"))
  write_rds(sub.summary.year.zone, paste0(data.summary.output.dir, "/SUBSTRATE summary by YEAR-SITE-DEPTH 2016-2019.rds"))
  write_csv(sub.summary.year.zone.div, paste0(data.summary.output.dir, "/SUBSTRATE diversity summary by YEAR-SITE-DEPTH 2016-2019.csv"))
  write_rds(sub.summary.year.zone.div, paste0(data.summary.output.dir, "/SUBSTRATE summary by YEAR-SITE-DEPTH 2016-2019.rds"))
  
  write_csv(sub.summary.year, paste0(data.summary.output.dir, "/SUBSTRATE summary by YEAR-SITE 2016-2019.csv"))
  write_rds(sub.summary.year, paste0(data.summary.output.dir, "/SUBSTRATE summary by YEAR-SITE 2016-2019.rds"))
  write_csv(sub.summary.year.div, paste0(data.summary.output.dir, "/SUBSTRATE diversity summary by YEAR-SITE 2016-2019.csv"))
  write_rds(sub.summary.year.div, paste0(data.summary.output.dir, "/SUBSTRATE summary by YEAR-SITE 2016-2019.rds"))
  
  write_csv(sub.summary.zone, paste0(data.summary.output.dir, "/SUBSTRATE summary by SITE-DEPTH 2016-2019.csv"))
  write_rds(sub.summary.zone, paste0(data.summary.output.dir, "/SUBSTRATE summary by SITE-DEPTH 2016-2019.rds"))
  write_csv(sub.summary.zone.div, paste0(data.summary.output.dir, "/SUBSTRATE diversity summary by SITE-DEPTH 2016-2019.csv"))
  write_rds(sub.summary.zone.div, paste0(data.summary.output.dir, "/SUBSTRATE summary by SITE-DEPTH 2016-2019.rds"))
  
  write_csv(sub.summary.site, paste0(data.summary.output.dir, "/SUBSTRATE summary by SITE 2016-2019.csv"))
  write_rds(sub.summary.site, paste0(data.summary.output.dir, "/SUBSTRATE summary by SITE 2016-2019.rds"))
  write_csv(sub.summary.site.div, paste0(data.summary.output.dir, "/SUBSTRATE diversity summary by SITE 2016-2019.csv"))
  write_rds(sub.summary.site.div, paste0(data.summary.output.dir, "/SUBSTRATE diversity summary by SITE 2016-2019.rds"))
  
  
  #### END WRITING OUT SUBSTRATE DATA FRAMES

Make substrate plots

1) plots for site-year-zone.

  # a) substrate type on x axis, site as facets
  sub.plot.year.zone_sub <- ggplot() +
    geom_jitter(data=sub.summary.year.transect,aes(y=PROPORTION,x=CLASSCODE,colour=YEAR),alpha=0.5, position=position_dodge(width=0.5))+
    geom_point(data=sub.summary.year.zone,aes(y=MEAN,x=CLASSCODE,fill=YEAR),color="black",size=3,shape=21, position=position_dodge(width=0.5))+
    geom_errorbar(data=sub.summary.year.zone,
                  aes(ymin=MEAN-SE-1e-10,ymax=MEAN+SE,x=CLASSCODE, colour=YEAR),width=0.1, position=position_dodge(width=0.5))  +
    scale_colour_viridis_d()+
    scale_fill_viridis_d()+
    facet_grid(SITE~ZONE) +
    theme_bw()
 plot(sub.plot.year.zone_sub)

  # b) site on x axis, substrate type as facets
  sub.plot.year.zone_site <- ggplot() +
    geom_jitter(data=sub.summary.year.transect,aes(y=PROPORTION,x=SITE, colour=YEAR),alpha=0.5, position=position_dodge(width=0.5) )+
    geom_point(data=sub.summary.year.zone,aes(y=MEAN,x=SITE,fill=YEAR),color="black",size=3,shape=21, position=position_dodge(width=0.5))+
    geom_errorbar(data=sub.summary.year.zone,
                  aes(ymin=MEAN-SE-1e-10,ymax=MEAN+SE,x=SITE, colour=YEAR),width=0.1, position=position_dodge(width=0.5))  +
    scale_colour_viridis_d()+
    scale_fill_viridis_d()+
    facet_grid(CLASSCODE~ZONE) +
    theme_bw() +
    theme(
      axis.text.x = element_text(angle = 45, hjust=1)
    )
  plot(sub.plot.year.zone_site)

  sub.plot.year.zone_site_div <- ggplot() +
    geom_jitter(data=sub.summary.year.transect.div,aes(y=SUBSTRATE_DIVERSITY,x=SITE, colour=YEAR),alpha=0.5, position=position_dodge(width=0.5) )+
    geom_point(data=sub.summary.year.zone.div,aes(y=MEAN,x=SITE,fill=YEAR),color="black",size=3,shape=21, position=position_dodge(width=0.5))+
    geom_errorbar(data=sub.summary.year.zone.div,
                  aes(ymin=MEAN-SE-1e-10,ymax=MEAN+SE,x=SITE, colour=YEAR),width=0.1, position=position_dodge(width=0.5))  +
    scale_colour_viridis_d()+
    scale_fill_viridis_d()+
    facet_grid(.~ZONE) +
    theme_bw() +
    theme(
      axis.text.x = element_text(angle = 45, hjust=1)
    )
  plot(sub.plot.year.zone_site_div)

  # c) compare zones, years as facets, lines connecting 5m v 10m
  
  sub.plot.year.zone_compare1 <- ggplot() +
    geom_jitter(data=sub.summary.year.transect,aes(y=PROPORTION,x=ZONE, colour=SITE),alpha=0.5, position=position_dodge(width=0.5))+
    geom_point(data=sub.summary.year.zone,aes(y=MEAN,x=ZONE,colour=SITE),size=2, position=position_dodge(width=0.5))+
    geom_line(data=sub.summary.year.zone,aes(y=MEAN,x=ZONE,colour=SITE),size=1)+
    geom_errorbar(data=sub.summary.year.zone,
                  aes(ymin=MEAN-SE-1e-10,ymax=MEAN+SE,x=ZONE,colour=SITE),width=0.1, position=position_dodge(width=0.5))  +
    scale_colour_viridis_d()+
    facet_grid(YEAR~CLASSCODE) +
    theme_bw()
  plot(sub.plot.year.zone_compare1)

  # d) compare zones, CLASSCODE as facets, no lines connecting 5m v 10m
  sub.plot.year.zone_compare2 <- ggplot() +
    geom_jitter(data=sub.summary.year.transect,aes(y=PROPORTION,x=ZONE,colour=YEAR),alpha=0.3 , position=position_dodge(width=0.5))+
    geom_point(data=sub.summary.year.zone,aes(y=MEAN,x=ZONE,fill=YEAR),size=2,shape=21, position=position_dodge(width=0.5))+
    geom_errorbar(data=sub.summary.year.zone,
                  aes(ymin=MEAN-SE-1e-10,ymax=MEAN+SE,x=ZONE,colour=YEAR),width=0.1, position=position_dodge(width=0.5))  +
    scale_colour_viridis_d() +
    scale_fill_viridis_d() +
    facet_grid(CLASSCODE~SITE) +
    theme_bw()
  plot(sub.plot.year.zone_compare2)

  sub.plot.year.zone_compare2_div <- ggplot() +
    geom_jitter(data=sub.summary.year.transect.div,aes(y=SUBSTRATE_DIVERSITY,x=ZONE,colour=YEAR),alpha=0.3 , position=position_dodge(width=0.5))+
    geom_point(data=sub.summary.year.zone.div,aes(y=MEAN,x=ZONE,fill=YEAR),size=2,shape=21, position=position_dodge(width=0.5))+
    geom_errorbar(data=sub.summary.year.zone.div,
                  aes(ymin=MEAN-SE-1e-10,ymax=MEAN+SE,x=ZONE,colour=YEAR),width=0.1, position=position_dodge(width=0.5))  +
    scale_colour_viridis_d() +
    scale_fill_viridis_d() +
    facet_grid(.~SITE) +
    theme_bw()
  plot(sub.plot.year.zone_compare2_div)

2) plots for site-year.

# a) substrate type on x axis, site as facets
  sub.plot.year_sub <- ggplot() +
    geom_point(data=sub.summary.year,aes(y=MEAN,x=CLASSCODE,fill=YEAR),colour="black",size=3,shape=21, position=position_dodge(width=0.5))+
    geom_errorbar(data=sub.summary.year,
                  aes(ymin=MEAN-SE-1e-10,ymax=MEAN+SE,x=CLASSCODE, colour=YEAR),width=0.1, position=position_dodge(width=0.5))  +
    scale_colour_viridis_d()+
    scale_fill_viridis_d()+
    facet_grid(SITE~.) +
    theme_bw()
  plot(sub.plot.year_sub)

  # b) site on x axis, substrate type as facets
  sub.plot.year_site <- ggplot() +
    geom_point(data=sub.summary.year,aes(y=MEAN,x=SITE,fill=YEAR),color="black",size=3,shape=21, position=position_dodge(width=0.5))+
    geom_errorbar(data=sub.summary.year,
                  aes(ymin=MEAN-SE-1e-10,ymax=MEAN+SE,x=SITE, colour=YEAR),width=0.1, position=position_dodge(width=0.5))  +
    scale_colour_viridis_d()+
    scale_fill_viridis_d()+
    facet_grid(CLASSCODE~.) +
    theme_bw() +
    theme(
      axis.text.x = element_text(angle = 45, hjust=1)
    )
  plot(sub.plot.year_site)

  sub.plot.year_site_div <- ggplot() +
    geom_point(data=sub.summary.year.div,aes(y=MEAN,x=SITE,fill=YEAR),color="black",size=3,shape=21, position=position_dodge(width=0.5))+
    geom_errorbar(data=sub.summary.year.div,
                  aes(ymin=MEAN-SE-1e-10,ymax=MEAN+SE,x=SITE, colour=YEAR),width=0.1, position=position_dodge(width=0.5))  +
    scale_colour_viridis_d()+
    scale_fill_viridis_d()+
    #facet_grid(CLASSCODE~.) +
    theme_bw() +
    theme(
      axis.text.x = element_text(angle = 45, hjust=1)
    )
  plot(sub.plot.year_site_div)

  # c) time series, CLASSCODE as facets
  
  sub.plot.year_ts1 <- ggplot() +
    geom_point(data=sub.summary.year,aes(y=MEAN,x=YEAR,colour=SITE, group=SITE),size=2, position=position_dodge(width=0.5))+
    geom_line(data=sub.summary.year,aes(y=MEAN,x=YEAR,colour=SITE, group=SITE),size=1, position=position_dodge(width=0.5))+
    geom_errorbar(data=sub.summary.year,
                  aes(ymin=MEAN-SE-1e-10,ymax=MEAN+SE,x=YEAR,colour=SITE, group=SITE),width=0.1, position=position_dodge(width=0.5))  +
    scale_colour_viridis_d()+
    facet_grid(.~CLASSCODE) +
    theme_bw()
  plot(sub.plot.year_ts1)

  # d) time series, SITE as facets
  
  sub.plot.year_ts2 <- ggplot() +
    geom_point(data=sub.summary.year,aes(y=MEAN,x=YEAR,colour=CLASSCODE, group=CLASSCODE),size=2, position=position_dodge(width=0.5))+
    geom_line(data=sub.summary.year,aes(y=MEAN,x=YEAR,colour=CLASSCODE, group=CLASSCODE),size=1, position=position_dodge(width=0.5))+
    geom_errorbar(data=sub.summary.year,
                  aes(ymin=MEAN-SE-1e-10,ymax=MEAN+SE,x=YEAR,colour=CLASSCODE, group=CLASSCODE),width=0.1, position=position_dodge(width=0.5))  +
    scale_colour_viridis_d()+
    facet_grid(.~SITE) +
    theme_bw()
  plot(sub.plot.year_ts2)

  sub.plot.year_ts2_div <- ggplot() +
    geom_point(data=sub.summary.year.div,aes(y=MEAN,x=YEAR,colour=SITE, group=SITE),size=2, position=position_dodge(width=0.5))+
    geom_line(data=sub.summary.year.div,aes(y=MEAN,x=YEAR,colour=SITE, group=SITE),size=1, position=position_dodge(width=0.5))+
    geom_errorbar(data=sub.summary.year.div,
                  aes(ymin=MEAN-SE-1e-10,ymax=MEAN+SE,x=YEAR,colour=SITE, group=SITE),width=0.1, position=position_dodge(width=0.5))  +
    scale_colour_viridis_d()+
    #facet_grid(.~SITE) +
    theme_bw()
  plot(sub.plot.year_ts2_div)

3) plots for site-zone.

  # a) substrate type on x axis, site as facets
  sub.plot.zone_sub <- ggplot() +
    geom_point(data=sub.summary.zone,aes(y=MEAN,x=CLASSCODE,fill=factor(ZONE)),colour="black",size=3,shape=21, position=position_dodge(width=0.5))+
    geom_errorbar(data=sub.summary.zone,
                  aes(ymin=MEAN-SE-1e-10,ymax=MEAN+SE,x=CLASSCODE, colour=factor(ZONE)),width=0.1, position=position_dodge(width=0.5))  +
    labs(fill='ZONE', colour='ZONE') +
    scale_colour_viridis_d()+
    scale_fill_viridis_d()+
    facet_grid(SITE~.) +
    theme_bw()
  plot(sub.plot.zone_sub)

  # b) site on x axis, substrate type as facets
  sub.plot.zone_site <- ggplot() +
    geom_point(data=sub.summary.zone,aes(y=MEAN,x=SITE,fill=factor(ZONE)),color="black",size=3,shape=21, position=position_dodge(width=0.5))+
    geom_errorbar(data=sub.summary.zone,
                  aes(ymin=MEAN-SE-1e-10,ymax=MEAN+SE,x=SITE, colour=factor(ZONE)),width=0.1, position=position_dodge(width=0.5))  +
    labs(fill='ZONE', colour='ZONE') +
    scale_colour_viridis_d()+
    scale_fill_viridis_d()+
    facet_grid(CLASSCODE~.) +
    theme_bw() +
    theme(
      axis.text.x = element_text(angle = 45, hjust=1)
    )
  plot(sub.plot.zone_site)

  sub.plot.zone_site_div <- ggplot() +
    geom_point(data=sub.summary.zone.div,aes(y=MEAN,x=SITE,fill=factor(ZONE)),color="black",size=3,shape=21, position=position_dodge(width=0.5))+
    geom_errorbar(data=sub.summary.zone.div,
                  aes(ymin=MEAN-SE-1e-10,ymax=MEAN+SE,x=SITE, colour=factor(ZONE)),width=0.1, position=position_dodge(width=0.5))  +
    labs(fill='ZONE', colour='ZONE') +
    scale_colour_viridis_d()+
    scale_fill_viridis_d()+
    #facet_grid(CLASSCODE~.) +
    theme_bw() +
    theme(
      axis.text.x = element_text(angle = 45, hjust=1)
    )
  plot(sub.plot.zone_site_div)

  # c) compare zones, years as facets, lines connecting 5m v 10m
  
  sub.plot.zone_compare1 <- ggplot() +
    geom_point(data=sub.summary.zone,aes(y=MEAN,x=factor(ZONE),colour=SITE, group=SITE),size=2, position=position_dodge(width=0.5))+
    geom_line(data=sub.summary.zone,aes(y=MEAN,x=factor(ZONE),colour=SITE, group=SITE),size=1, position=position_dodge(width=0.5))+
    geom_errorbar(data=sub.summary.zone,
                  aes(ymin=MEAN-SE-1e-10,ymax=MEAN+SE,x=factor(ZONE),colour=SITE, group=SITE),width=0.1, position=position_dodge(width=0.5))  +
    xlab("ZONE") +
    scale_colour_viridis_d()+
    facet_grid(.~CLASSCODE) +
    theme_bw()
  plot(sub.plot.zone_compare1)

  sub.plot.zone_compare1_div <- ggplot() +
    geom_point(data=sub.summary.zone.div,aes(y=MEAN,x=factor(ZONE),colour=SITE, group=SITE),size=2, position=position_dodge(width=0.5))+
    geom_line(data=sub.summary.zone.div,aes(y=MEAN,x=factor(ZONE),colour=SITE, group=SITE),size=1, position=position_dodge(width=0.5))+
    geom_errorbar(data=sub.summary.zone.div,
                  aes(ymin=MEAN-SE-1e-10,ymax=MEAN+SE,x=factor(ZONE),colour=SITE, group=SITE),width=0.1, position=position_dodge(width=0.5))  +
    xlab("ZONE") +
    scale_colour_viridis_d()+
    #facet_grid(.~CLASSCODE) +
    theme_bw()
  plot(sub.plot.zone_compare1_div)

  # d) compare zones, CLASSCODE as facets, no lines connecting 5m v 10m
  
  sub.plot.zone_compare2 <- ggplot() +
    geom_point(data=sub.summary.zone,aes(y=MEAN,x=factor(ZONE),colour=CLASSCODE, group=CLASSCODE),size=2, position=position_dodge(width=0.5))+
    geom_line(data=sub.summary.zone,aes(y=MEAN,x=factor(ZONE),colour=CLASSCODE, group=CLASSCODE),size=1, position=position_dodge(width=0.5))+
    geom_errorbar(data=sub.summary.zone,
                  aes(ymin=MEAN-SE-1e-10,ymax=MEAN+SE,x=factor(ZONE),colour=CLASSCODE, group=CLASSCODE),width=0.1, position=position_dodge(width=0.5))  +
    xlab("ZONE") +
    scale_colour_viridis_d()+
    facet_grid(.~SITE) +
    theme_bw()
  plot(sub.plot.zone_compare2)

4) plots for site

  # a) substrate type on x axis, site as facets
  sub.plot.site_sub <- ggplot() +
    geom_point(data=sub.summary.site,aes(y=MEAN,x=CLASSCODE, fill=CLASSCODE),colour="black",size=3,shape=21, position=position_dodge(width=0.5))+
    geom_errorbar(data=sub.summary.site,
                  aes(ymin=MEAN-SE-1e-10,ymax=MEAN+SE,x=CLASSCODE, colour=CLASSCODE),width=0.1, position=position_dodge(width=0.5))  +
    scale_colour_viridis_d()+
    scale_fill_viridis_d()+
    facet_grid(SITE~.) +
    theme_bw()
  plot(sub.plot.site_sub)

  # b) site on x axis, substrate type as facets
  sub.plot.site_site <- ggplot() +
    geom_point(data=sub.summary.site,aes(y=MEAN,x=SITE,fill=SITE),color="black",size=3,shape=21, position=position_dodge(width=0.5))+
    geom_errorbar(data=sub.summary.site,
                  aes(ymin=MEAN-SE-1e-10,ymax=MEAN+SE,x=SITE, colour=SITE),width=0.1, position=position_dodge(width=0.5))  +
    scale_colour_viridis_d()+
    scale_fill_viridis_d()+
    facet_grid(CLASSCODE~.) +
    theme_bw() +
    theme(
      axis.text.x = element_text(angle = 45, hjust=1)
    )
  plot(sub.plot.site_site)

  sub.plot.site_site_div <- ggplot() +
    geom_point(data=sub.summary.site.div,aes(y=MEAN,x=SITE,fill=SITE),color="black",size=3,shape=21, position=position_dodge(width=0.5))+
    geom_errorbar(data=sub.summary.site.div,
                  aes(ymin=MEAN-SE-1e-10,ymax=MEAN+SE,x=SITE, colour=SITE),width=0.1, position=position_dodge(width=0.5))  +
    scale_colour_viridis_d()+
    scale_fill_viridis_d()+
    #facet_grid(CLASSCODE~.) +
    theme_bw() +
    theme(
      axis.text.x = element_text(angle = 45, hjust=1)
    )
  plot(sub.plot.site_site_div)

PRINT OUT ALL OF THE PLOTS IN A SINGLE PDF.

  pdf(file=paste0(base.dir,"/Plots/Substrate v3.pdf"),onefile=T)
  
  print(sub.plot.site_sub)
  print(sub.plot.site_site)
  plot(sub.plot.site_site_div)
  
  print(sub.plot.zone_sub)
  print(sub.plot.zone_site)
  print(sub.plot.zone_compare1)
  print(sub.plot.zone_compare2)
  print(sub.plot.zone_site_div)
  print(sub.plot.zone_compare1_div)
  
  print(sub.plot.year_sub)
  print(sub.plot.year_site)
  print(sub.plot.year_ts1)
  print(sub.plot.year_ts2)
  print(sub.plot.year_site_div)
  print(sub.plot.year_ts2_div)
  
  print(sub.plot.year.zone_sub)  
  print(sub.plot.year.zone_site)
  print(sub.plot.year.zone_compare1)
  print(sub.plot.year.zone_compare2)
  print(sub.plot.year.zone_site_div)
  print(sub.plot.year.zone_compare2_div)
  
  invisible(dev.off())